import requests
import js2py
from bs4 import BeautifulSoup
import time
headers={
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
    "upgrade-insecure-requests": "1",
    "referrer": "http://www.pbc.gov.cn/goutongjiaoliu/113456/113469/5127140/index.html",
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
  }
session=requests.Session()
session.headers.update(headers)
def get_var():
    index_url="http://www.pbc.gov.cn/goutongjiaoliu/113456/113469/index.html"
    html=session.get(url=index_url).content.decode()
    soup=BeautifulSoup(html,features="lxml")
    script=soup.find(name="script").text
    var_data=script[script.find("var Q"):]
    var_data=var_data[:var_data.find("'utf-8';")+8]
    print(var_data)
    return var_data


def get_K():
  ctx = js2py.EvalJs()
  js_code=get_var()+"\n"
  js_code+=open('test.js', 'r', encoding='utf-8').read()
  ctx.execute(js_code)
  K=ctx.D()
  print("K:",ctx.D())
  return K

def get_news_list(page=1):
  K=get_K()
  url1=f"http://www.pbc.gov.cn{K}"
  print(url1)
  print(session.get(url1))
  index_url=f"http://www.pbc.gov.cn/goutongjiaoliu/113456/113469/11040/index{str(page)}.html"
  content=session.get(index_url).content.decode()
  #  print("Cookie:",session.cookies.get_dict())
  soup=BeautifulSoup(content,features="lxml")
  div=soup.find(name="div",attrs={"class":"mainw950"})
  font=div.find_all(name="font")
  news_list=[]
  for f in font:
    try:
      a=f.find(name="a")
      title=a.text
      href="http://www.pbc.gov.cn"+a["href"]
      news_list.append({"title":title,"href":href})
      print(title,href)
    except:
        continue
  return news_list
  
def download_news(page=1):
   news_list=get_news_list(page)
   for news in news_list:
      url=news["href"]
      title=news["title"]
      req=session.get(url)
      
      if(req.status_code==200):
        content=req.content.decode()
        soup=BeautifulSoup(content,features="lxml")
        div=soup.find(name="div",attrs={"id":"zoom"})
        # print(div.text)
        with open(f"./news/{title}","w",encoding="utf-8") as f:
           f.write(div.text)
           f.close()
           time.sleep(1.5)
           print("下载完成")
            
if __name__=="__main__":
  #下载第二页的所有新闻
  download_news(2)
  



